Rem
Rem $Header: rdbms/demo/dmnmdemo.sql /main/16 2012/04/15 16:31:56 xbarr Exp $
Rem
Rem dmnmdemo.sql
Rem
Rem Copyright (c) 2003, 2012, Oracle and/or its affiliates. 
Rem All rights reserved. 
Rem
Rem    NAME
Rem      dmnmdemo.sql - Sample program for the DBMS_DATA_MINING package.
Rem
Rem    DESCRIPTION
Rem      This script creates a feature extraction model
Rem      using the NMF algorithm
Rem      and data in the SH (Sales History) schema in the RDBMS. 
Rem
Rem    NOTES
Rem     
Rem
Rem    MODIFIED   (MM/DD/YY) 
Rem    yincwang    03/22/12 - use auto data prep
Rem    amozes      01/23/12 - updates for 12c
Rem    xbarr       01/09/12 - add feature_details demo
Rem    xbarr       12/01/10 - modified case 3 and removed type creation
Rem    xbarr       10/20/10 - binary_double results reformating
Rem    ramkrish    06/14/07 - remove commit after settings
Rem    jiawang     01/17/08 - Correct comments
Rem    ramkrish    10/25/07 - replace deprecated get_model calls with catalog
Rem                           queries
Rem    jiawang     03/06/07 - Add order by to fix bug5381396
Rem    xbarr       02/22/07 - add force drop for type objects
Rem    amozes      05/09/06 - repeatable ordering 
Rem    ktaylor     07/11/05 - Minor edits to comments
Rem    ramkrish    02/01/05 - remove rownum clause in model signature 
Rem    jcjeon      01/18/05 - add column format 
Rem    ramkrish    10/27/04 - add data analysis and comments/cleanup
Rem    jiawang     07/22/04 - Add order by to fix sorting dif 
Rem    xbarr       06/25/04 - xbarr_dm_rdbms_migration
Rem    ramkrish    10/20/03 - ramkrish_txn109085
Rem    pstengar    10/17/03 - added denormalization of model details
Rem    cbhagwat    10/17/03 - feature_extraction
Rem    ramkrish    10/02/03 - Creation
  
SET serveroutput ON
SET trimspool ON  
SET pages 10000
SET linesize 100
SET echo ON

-----------------------------------------------------------------------
--                            SAMPLE PROBLEM
-----------------------------------------------------------------------
-- Given demographic data about a set of customers, extract features
-- from the given dataset.
--

-----------------------------------------------------------------------
--                            BUILD THE MODEL
-----------------------------------------------------------------------

-- Cleanup old settings table objects for repeat runs
BEGIN EXECUTE IMMEDIATE 'DROP TABLE nmf_sh_sample_settings';
EXCEPTION WHEN OTHERS THEN NULL; END;
/
-- Cleanup old model with same name for repeat runs
BEGIN DBMS_DATA_MINING.DROP_MODEL('NMF_SH_sample');
EXCEPTION WHEN OTHERS THEN NULL; END;
/

-------------------
-- SPECIFY SETTINGS
--
-- CREATE A SETTINGS TABLE
--
-- NMF is the default Feature Extraction algorithm. For this sample,
-- we use Data Auto Preparation.
--
set echo off
CREATE TABLE nmf_sh_sample_settings (
   setting_name  VARCHAR2(30),
   setting_value VARCHAR2(4000));
set echo on
 
BEGIN
  -- Populate settings table
  insert into NMF_SH_SAMPLE_SETTINGS (SETTING_NAME, SETTING_VALUE) values
  (DBMS_DATA_MINING.PREP_AUTO,DBMS_DATA_MINING.PREP_AUTO_ON);
  -- Other examples of possible overrides are:
  -- (dbms_data_mining.feat_num_features, 10);
  -- (dbms_data_mining.nmfs_conv_tolerance,0.05);
  -- (dbms_data_mining.nmfs_num_iterations,50);
  -- (dbms_data_mining.nmfs_random_seed,-1);
END;
/     

---------------------
-- CREATE A NEW MODEL
--
-- Build NMF model
BEGIN
  DBMS_DATA_MINING.CREATE_MODEL(
    model_name          => 'NMF_SH_sample',
    mining_function     => dbms_data_mining.feature_extraction,
    DATA_TABLE_NAME     => 'mining_data_build_v',
    CASE_ID_COLUMN_NAME => 'cust_id',
    settings_table_name => 'nmf_sh_sample_settings');
END;
/

-------------------------
-- DISPLAY MODEL SETTINGS
--
column setting_name format a30
column setting_value format a30
SELECT setting_name, setting_value
  FROM user_mining_model_settings
 WHERE model_name = 'NMF_SH_SAMPLE'
ORDER BY setting_name;

--------------------------
-- DISPLAY MODEL SIGNATURE
--
column attribute_name format a40
column attribute_type format a20
SELECT attribute_name, attribute_type
  FROM user_mining_model_attributes
 WHERE model_name = 'NMF_SH_SAMPLE'
ORDER BY attribute_name;

------------------------
-- DISPLAY MODEL DETAILS
--
-- Each feature is a linear combination of the original attribute set; 
-- the coefficients of these linear combinations are non-negative.
-- The model details return for each feature the coefficients
-- associated with each one of the original attributes. Categorical 
-- attributes are described by (attribute_name, attribute_value) pairs.
-- That is, for a given feature, each distinct value of a categorical 
-- attribute has its own coefficient.
--
column attribute_name format a20;
column attribute_value format a60;
SELECT F.feature_id,
       A.attribute_name,
       A.attribute_value,
       A.coefficient
  FROM TABLE(DBMS_DATA_MINING.GET_MODEL_DETAILS_NMF('NMF_SH_Sample')) F,
       TABLE(F.attribute_set) A
WHERE feature_id = 1
  AND attribute_name in ('AFFINITY_CARD','AGE','COUNTRY_NAME')
ORDER BY feature_id,attribute_name,attribute_value;

-----------------------------------------------------------------------
--                               TEST THE MODEL
-----------------------------------------------------------------------
-- There is no specific set of testing parameters for feature extraction.
-- Examination and analysis of features is the main method to prove
-- the efficacy of an NMF model.
--

-----------------------------------------------------------------------
--                               APPLY THE MODEL
-----------------------------------------------------------------------
--
-- For a descriptive mining function like feature extraction, "Scoring"
-- involves providing the probability values for each feature.
-- During model apply, an NMF model maps the original data into the 
-- new set of attributes (features) discovered by the model.
-- 

-------------------------------------------------
-- SCORE NEW DATA USING SQL DATA MINING FUNCTIONS
--
------------------
-- BUSINESS CASE 1
-- List the features that correspond to customers in this dataset.
-- The feature that is returned for each row is the one with the
-- largest value based on the inputs for that row.
-- Count the number of rows that have the same "largest" feature value.
--
SELECT FEATURE_ID(nmf_sh_sample USING *) AS feat, COUNT(*) AS cnt
  FROM mining_data_apply_v
group by FEATURE_ID(NMF_SH_SAMPLE using *)
ORDER BY cnt DESC,FEAT DESC;

------------------
-- BUSINESS CASE 2
-- List top (largest) 3 features that represent a customer (100002).
-- Explain the attributes which most impact those features.
--
set line 120
column fid format 999
column val format 999.999
set long 20000
SELECT S.feature_id fid, value val,
       FEATURE_DETAILS(nmf_sh_sample, S.feature_id, 5 using T.*) det
FROM 
  (SELECT v.*, FEATURE_SET(nmf_sh_sample, 3 USING *) fset
    FROM mining_data_apply_v v
   WHERE cust_id = 100002) T, 
  TABLE(T.fset) S
order by 2 desc;

-----------------------------------------------------------------------
--    BUILD and APPLY a transient model using analytic functions
-----------------------------------------------------------------------
-- In addition to creating a persistent model that is stored as a schema
-- object, models can be built and scored on data on the fly using
-- Oracle's analytic function syntax.

----------------------
-- BUSINESS USE CASE 3
-- 
-- Map customer attributes into six features and return the feature
-- mapping for customer 100001.
-- All data in the apply view is used to construct the feature mappings.
-- All necessary data preparation steps are automatically performed.
column feature_id format 999
column value format 999.999
select feature_id, value 
from (
 select cust_id, feature_set(into 6 using *) over () fset
 from mining_data_apply_v),
table(fset)
where cust_id = 100001
order by feature_id;
